/* This file constructs the main tables and figures in "How Wide is the Firm Border?" 
   The main inputs to this file a produced in an earlier do file, data_compile.do .
   The code is written in a somewhat modular fashion, so that the computation of 
   individual tables or figures can be switched on or off by changing the value of
   the local macro variables below.
     
 */

clear all
clear mata
clear matrix 
set more off
set maxvar 20000
set matsize 10000

local indvar = "naicsup"
local indvardown = "naicsdown"
local pctlink=10  /* This variable being equal to 10 indicates that we are using a 1.0 percent cutoff 
          when defining which pairs of industries are vertically related to one another.
		  Other feasible values include 20 (2.0 percent cutoff) and 30  (3.0 percent cutoff) */

local table23=0  /* part of Table 2 and part of table 3 */
local table716=0  /* Tables 7 and 16 */
local table1=0  /* Table 1 */ 
local figure2=0  /* figure 2*/ 
local table5=0   /* Table 5 */
local table9=0  /* Table 9: Columns 2-4*/
local table10=0  /* Table 10: Columns 1-3 */
local table_panel=0

local compute_specialization=0 /* Number that appears in footnote 8 */
local include_mu_0=0   /* Final column of Table 9 */
local figure_1=0 /* Figure 1 */
local alternate_cluster=0 /* Table 12 */
local shipments_per_est=0 /* Columns 4-6 of Table 10 */
local noexports=0  /* Columns 2-3 of Table  14*/ 
local countyregs=0  /* Column 5 of Table 14 */
local subsample_big_zips =0  /* Columns 2-3 of Table 11 */

local ten_mile=1 /* Columns 4-6 of Table 2 */
local get_firm_frac_2002=0 
local extensive=1  /* Columns 4-5 of Table 3 */
local additional_panel_regs=1 /* Tables 4 and 8 */
local different_weights=1  /* Table 13 */
local inter_cf= 1  /* Table 15 */ 

/*
define local esampfolder 
define local meaninvnumest - take the mean of the inverse of the number of establishments in the destination zip code 
*/ 

/*
Atalay et al. : The two lines of code, above, were commented out by the Census staff for 
   disclosure avoidance review reasons. This comment, here, is written after this commenting 
   by the Census staff. 
   
   The first of the two lines introduces a folder to which we have saved datasets 
   that were then later used) to run the "disclosure avoidance review" code. 
   
   The second of the two lines introduces a local macro variable 
   that is equal to the 1/(1+r_{i^e_z}) term first introduced in the second paragraph 
   of Section IV.A of the paper. 
*/

  
if `table23'==1 {
use analysis_data2007, clear
xtset lbdnum
drop if mu!=1
drop if same_zip==1

/******************************************************
	Table 2: Columns 1, 2, 3
******************************************************/

xtpoisson absorb_ratio lmileage2_mrW firmfrac_mrW, fe robust
/* The next few lines of code will appear repeatedly within this do file. Here 
   we are saving datasets that will be used by a program designed to check that our 
   tables do not disclose information about any individual firm or any small set of firms. 
   Removing the lines of code between "gen esamp=1 if e(sample)" to "drop esamp" will
   not negatively affect the performance of this do file. */
gen esamp=1 if e(sample)
preserve
	keep if esamp==1
	save `esampfolder'/table2, replace
	/*sample used in tables1-7.do to generate summary statistics*/
restore
drop esamp
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe robust
xtpoisson absorb_ratio lmileage2 firmfrac, fe robust

/******************************************************
	Table 3: Columns 2 and 3, then column 1
******************************************************/
xtset lbdnum
drop if mu!=1
drop if same_zip==1
sum lmileage2, meanonly
gen lmileage2_c=lmileage2-r(mean)
gen distinter=lmileage2_c*firmfrac

poi2hdfe absorb_ratio lmileage2  firmfrac, id1(lbdnum) id2(dest_zip) robust
gen esamp=1 if e(sample)
preserve
	keep if esamp==1
        keep firmid dest_zip lbdnum 
	save `esampfolder'/table3, replace
restore
drop esamp
poi2hdfe absorb_ratio lmileage2_c  firmfrac distinter, id1(lbdnum) id2(dest_zip) robust

use firmid firmfrac_mr firmfrac lmileage2 naicsup dest_zip orig_zip mu same_zip absorb_ratio lmileage2_mr lbdnum using  analysis_data2007, clear
sum lmileage2_mr, meanonly
gen lmileage2_mr_c = lmileage2_mr - r(mean)
gen inter_mr=lmileage2_mr_c*firmfrac_mr

gen inter=lmileage2*firmfrac
egen mean1 = mean(inter) , by(dest_zip naicsup)
egen mean2 = mean(inter) , by(orig_zip naicsup)
egen mean3 = mean(inter) , by(naicsup)
gen inter_mr2=inter-mean1-mean2+mean3
 
xtset lbdnum
drop if mu!=1
drop if same_zip==1
xtpoisson absorb_ratio  lmileage2_mr_c firmfrac_mr inter_mr, fe robust
xtpoisson absorb_ratio  lmileage2_mr firmfrac_mr inter_mr2, fe robust

}

if `table5'==1 {

 /******************************************************
	Table 5
*******************************************************/

/* The file rauch_classification_naics can be found on Jon Haveman's 
   International Trade Data website. 
   See https://www.macalester.edu/research/economics/PAGE/HAVEMAN/Trade.Resources/TradeData.html */
   
   
use rauch_classification_naics, clear
/* Compute the modal Rauch classification by four or five-digit naics. we 
   will use these modal classifications to fill in missing values for particular
   six-digit naics codes */
gen naics5=substr(naics,1,5)
gen naics4=substr(naics,1,4)
bys naics: egen lib_m=mode(lib) , minmode
bys naics4: egen lib4=mode(lib) , minmode
bys naics5: egen lib5=mode(lib) , minmode
bys naics: egen con_m=mode(con) , minmode
bys naics4: egen con4=mode(con) , minmode
bys naics5: egen con5=mode(con) , minmode
sort naics naics5 con5 con4 lib5 lib4
by naics: keep if _n==1
keep naics* con* lib*
drop con lib
ren con_m con
ren lib_m lib
replace con=con5 if con==""
replace lib=lib5 if lib==""

/* Fillin in naics codes which have XX in the 
suffix. Assume that the Rauch codes are the same within a group. */
expand 100 if substr(naics, 5,2)=="XX"
bys naics: gen x=_n
replace x=x-1
tostring x, replace
replace x="0"+x if length(x)==1
replace naics=substr(naics,1,4)+x if substr(naics, 5,2)=="XX"
replace naics5=substr(naics,1,4)+substr(x,1,1) if substr(naics, 5,2)=="XX"
drop x
expand 10 if substr(naics, 6,1)=="X"
bys naics: gen x=_n
replace x=x-1
tostring x, replace
replace naics=substr(naics,1,5)+x if substr(naics, 6,1)=="X"
drop x
drop if naics==""
destring naics, replace
drop naics5 naics4 lib4 lib5 con4 con5
ren naics naicsup
save rauch_naics, replace

/* This is the 2007 Census of Manufacturers */
use cmf2007, clear
gsort lbdnum -tvs
by lbdnum: keep if _n==1
keep lbdnum tvs tce naics_new ecom_sales cmec cexso cprte cdapr
merge 1:1 lbdnum using lbd2007
drop if _merge~=3
drop _merge
drop naics
gen naics=substr(bestnaics,1,6)
collapse (sum) tvs tce ecom_sales cmec cexso cprte cdapr , by(naics)
gen ecom_intensity=ecom_sales/tvs
gen it_intensity=(cmec+cexso+cprte+cdapr)/tvs
gen cap_intensity=tce/tvs
keep cap_intensity naics tvs it_intensity ecom_intensity
/* There may be observations which have naics codes outside of our above range with nonmissing 
   values for variables which are measured for manufacturing plants. For these establishments, 
   drop the values for these variables. Actual values redacted.  */
   
 /* Atalay et al.: The three 2-digit codes that were redacted by the Census staff correspond to 
    the three 2-digit industries for which these variables are measured */ 
replace cap_intensity=. if substr(naics,1,2)~="XX" &  substr(naics,1,2)~="XX" & substr(naics,1,2)~="XX"
replace ecom_intensity=. if substr(naics,1,2)~="XX" &  substr(naics,1,2)~="XX" & substr(naics,1,2)~="XX"
replace it_intensity=. if substr(naics,1,2)~="XX" &  substr(naics,1,2)~="XX" & substr(naics,1,2)~="XX"
sum ecom_intensity, de
gen ecomint = (ecom_intensity > r(p50))
sum cap_intensity, de
gen cint = (cap_intensity > r(p50))
sum it_intensity, de
gen itint = (it_intensity > r(p50))
replace cint=. if cap_intensity==.
replace itint=. if it_intensity==.
replace ecomint=. if ecom_intensity==.
keep naics cint cap_intensity itint it_intensity ecom_intensity ecomint
ren naics naicsup
drop if itint==.
destring naics, replace
save cap_intensity_naics, replace
/* This dataset will describe which industries have below or above average 
  capital intensity, IT capital intensity, and ecommerce intensity */

use cfs2007cleaned, replace
drop naics
gen naics=substr(bestnaics,1,6)
collapse (sum) vs weight , by(naics)
gen val_2_wei=vs/weight
sum val_2_wei, de
gen v2w=(val_2_wei>r(p50)) 
replace v2w=. if val_2_wei==.
keep v2w val_2_wei naics
ren naics naicsup
drop if naics==""
destring naics, replace
save v2w_naics, replace

use wholesale firmid lbdnum dest_zip naics lmileage2* firmfrac* orig_zip absorb_ratio mu same_zip num_est using analysis_data2007_panel, replace
merge n:1 naics using v2w_naics, keep(1 3)
drop _merge
merge n:1 naics using cap_intensity_naics, keep(1 3)
drop _merge
merge n:1 naics using rauch_naics, keep(1 3)
drop _merge

gen prodC_r = 0 if con~=""
replace prodC_r = 1 if con=="r"
gen prodC_n = 0 if con~=""
replace prodC_n = 1 if con=="n"
gen prodC_w = 0 if con~=""
replace prodC_w = 1 if con=="w"

gen prodL_r = 0 if lib~=""
replace prodL_r = 1 if lib=="r"
gen prodL_n = 0 if lib~=""
replace prodL_n = 1 if lib=="n"
gen prodL_w = 0 if lib~=""
replace prodL_w = 1 if lib=="w"

gen inv_num_est=(1+num_est)^(-1)

gen inter=lmileage2*firmfrac
egen mean1 = mean(inter) , by(dest_zip naicsup)
egen mean2 = mean(inter) , by(orig_zip naicsup)
egen mean3 = mean(inter) , by(naicsup)
gen inter_mr=inter-mean1-mean2+mean3
drop mean1 mean2 mean3
 
 foreach v in wholesale itint ecomint cint v2w   prodL_n prodL_w prodL_r  prodC_n prodC_w prodC_r  {
   gen v1=`v'*lmileage2
   gen v2=`v'*firmfrac
   gen v3=`v'*lmileage2*firmfrac

   egen mean1 = mean(v1) , by(dest_zip naicsup)
   egen mean2 = mean(v1) , by(orig_zip naicsup)  
   egen mean3 = mean(v1) , by(naicsup)
   
   gen `v'_dist_mr=v1-mean1-mean2+mean3
   drop mean1 mean2 mean3

   egen mean1 = mean(v2) , by(dest_zip naicsup)
   egen mean2 = mean(v2) , by(orig_zip naicsup)  
   egen mean3 = mean(v2) , by(naicsup)
   gen `v'_share_mr=v2-mean1-mean2+mean3
   
   drop mean1 mean2 mean3
   egen mean1 = mean(v3) , by(dest_zip naicsup)
   egen mean2 = mean(v3) , by(orig_zip naicsup)  
   egen mean3 = mean(v3) , by(naicsup)
   gen `v'_dist_share_mr=v3-mean1-mean2+mean3
   drop mean1 mean2 mean3 v1 v2 v3
}
 
keep if mu==1 & same_zip~=1
xtset lbdnum 

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr wholesale_dist_mr wholesale_share_mr, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr wholesale_dist_mr wholesale_share_mr inter_mr wholesale_dist_share_mr, fe robust
gen esampW=1 if e(sample)

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr itint_dist_mr itint_share_mr, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr itint_dist_mr itint_share_mr inter_mr itint_dist_share_mr, fe robust

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr ecomint_dist_mr ecomint_share_mr, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr ecomint_dist_mr ecomint_share_mr inter_mr ecomint_dist_share_mr, fe robust 

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr cint_dist_mr cint_share_mr, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr cint_dist_mr cint_share_mr inter_mr cint_dist_share_mr, fe robust
gen esampMF=1 if e(sample)

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr v2w_dist_mr v2w_share_mr, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr v2w_dist_mr v2w_share_mr inter_mr v2w_dist_share_mr, fe robust
gen esampV=1 if e(sample)

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr prodL_n_dist_mr prodL_w_dist_mr  prodL_n_share_mr prodL_w_share_mr  , fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr prodL_n_dist_mr prodL_w_dist_mr  prodL_n_share_mr prodL_w_share_mr inter_mr prodL_n_dist_share_mr prodL_w_dist_share_mr  , fe robust
gen esampR1=1 if e(sample)

xtpoisson absorb_ratio lmileage2_mr firmfrac_mr prodC_n_dist_mr prodC_w_dist_mr  prodC_n_share_mr prodC_w_share_mr , fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr prodC_n_dist_mr prodC_w_dist_mr  prodC_n_share_mr prodC_w_share_mr inter_mr prodC_n_dist_share_mr prodC_w_dist_share_mr , fe robust
gen esampR2=1 if e(sample)

keep esamp* lbdnum dest_zip firmid  wholesale itint ecomint cint v2w  prodL_n prodL_w prodL_r  prodC_n prodC_w prodC_r 
save `esampfolder'/esamp_table6, replace

}


/*******************************************************
	Figure 2

NAICS values redacted by Census Staff.
Atalay et al.: The values that are redacted correspond, in order, to how they appear 
    in Figure 2 of the paper. 
*******************************************************/

if `figure2'==1 {
  
use firmid mu same_zip naicsup lbdnum  lmileage2 firmfrac absorb_ratio dest_zip orig_zip using  analysis_data2007, clear
xtset lbdnum
gen group=.
replace group=1 if floor(naicsup/1e3)==XXX
replace group=2 if floor(naicsup/1e3)==XXX | floor(naicsup/1e3)==XXX
replace group=3 if floor(naicsup/1e3)==XXX | floor(naicsup/1e3)==XXX |  floor(naicsup/1e3)==XXX | floor(naicsup/1e3)==XXX
replace group=4 if floor(naicsup/1e3)==XXX
replace group=5 if floor(naicsup/1e3)==XXX
replace group=6 if floor(naicsup/1e3)==XXX
replace group=7 if floor(naicsup/1e3)==XXX
replace group=8 if floor(naicsup/1e3)==XXX
replace group=9 if floor(naicsup/1e3)==XXX
replace group=10 if floor(naicsup/1e3)==XXX
replace group=11 if floor(naicsup/1e3)==XXX
replace group=12 if floor(naicsup/1e3)==XXX
replace group=13 if floor(naicsup/1e3)==XXX
replace group=14 if floor(naicsup/1e3)==XXX
replace group=15 if floor(naicsup/1e3)==XXX
replace group=16 if floor(naicsup/1e3)==XXX
replace group=17 if floor(naicsup/1e3)==XXX
replace group=18 if floor(naicsup/1e3)==XXX
replace group=19 if floor(naicsup/1e4)==XX 

foreach v in  lmileage2 firmfrac {
        di "`v'"
	egen mean1 = mean(`v'), by(dest_zip `indvar' group)
	egen mean2 = mean(`v'), by(orig_zip `indvar' group)
	egen mean3 = mean(`v'), by(`indvar' group) 
	gen `v'_mrG=`v'-mean1-mean2+mean3
	drop mean1 mean2 mean3
}
keep if mu==1 & same_zip~=1
xtpoisson absorb_ratio lmileage2_mrG firmfrac_mrG if group==1, fe robust
gen esamp=1 if e(sample)
forvalues idx=2/19 {
  xtpoisson absorb_ratio lmileage2_mrG firmfrac_mrG if group==`idx', fe robust
  replace esamp=`idx' if e(sample)
 }

keep if esamp~=.
save `esampfolder'/figure2, replace

}


if `table716'==1 {

use mu orig_zip naicsup shipment dest_zip lmileage2_mr firmfrac_mr same_zip im1frac_mr im2frac_mr im3frac_mr  lbdnum absorb_ratio  using analysis_data2007, replace
drop if mu!=1
drop if same_zip==1

merge n:1 lbdnum dest_zip using  `esampfolder'/table3
keep if _merge==3

/* This material is in Table 7 */

foreach v in im1frac_mr im2frac_mr im3frac_mr {
	xtset lbdnum
	xtreg firmfrac_mr `v' lmileage2_mr, fe 
	test `v'
	predict errort`v', e
	xtpoisson absorb_ratio lmileage2_mr firmfrac_mr errort`v', fe robust
}

/* This material is in Table 16 */

foreach v in  firmfrac_mr lmileage2_mr  im1frac_mr im2frac_mr im3frac_mr  {
	sum `v', meanonly
	gen `v'dev=`v'-r(mean)
}

gmm gmm_poiend, nequations(1) vce(cluster lbdnum) onestep //
	parameters(absorb_ratio:lmileage2_mrdev absorb_ratio:firmfrac_mrdev ) //
	 instruments(lmileage2_mrdev im3frac_mrdev , noconstant) 
	 
gmm gmm_poiend, nequations(1) vce(cluster lbdnum) onestep //
	parameters(absorb_ratio:lmileage2_mrdev absorb_ratio:firmfrac_mrdev ) //
	 instruments(lmileage2_mrdev im2frac_mrdev , noconstant) 

gmm gmm_poiend, nequations(1) vce(cluster lbdnum) onestep //
	parameters(absorb_ratio:lmileage2_mr absorb_ratio:firmfrac_mr ) //
	 instruments(lmileage2_mrdev im1frac_mrdev , noconstant)

}  

* Tables 1 Summary Statistics
if `table1'==1 {
  
/******************************************************
	Table 1
******************************************************/

use `esampfolder'/table2, clear

*******************************************************
* Panel A

count if absorb_ratio==.
count if absorb_ratio~=. & absorb_ratio==1
local c1=r(N)
count if absorb_ratio~=. 
local t=r(N)
count if absorb_ratio~=. & absorb_ratio==0
local c0=r(N)

dis `c0'/`t'
dis `c1'/`t'

sum absorb_ratio, de
sum shipment, de
sum tot_ship_d, de

foreach pct in 10 25 50 75 90{
	local p1=`pct'+1
	local p2=`pct'-1
	
	egen shipment_p1=pctile(shipment), p(`p1')
	egen shipment_p2=pctile(shipment), p(`p2')
	
	gen shipment_`pct'=(shipment_p1+shipment_p2)/2
	gen shipment_`pct'samp=1 if shipment==shipment_p1 | shipment==shipment_p2
	
	egen tot_ship_p1=pctile(tot_ship_d), p(`p1')
	egen tot_ship_p2=pctile(tot_ship_d), p(`p2')
	gen tot_ship_`pct'=(tot_ship_p1+tot_ship_p2)/2
	gen tot_ship_`pct'samp=1 if tot_ship_d==tot_ship_p1 | tot_ship_d==tot_ship_p2
	
	egen absorb_ratio_p1=pctile(absorb_ratio), p(`p1')
	egen absorb_ratio_p2=pctile(absorb_ratio), p(`p2')
	gen absorb_ratio_`pct'=(absorb_ratio_p1+absorb_ratio_p2)/2
	gen absorb_ratip_`pct'samp=1 if absorb_ratio==absorb_ratio_p1 | absorb_ratio==absorb_ratio_p2
	
	drop shipment_p1 shipment_p2 tot_ship_p1 tot_ship_p2 absorb_ratio_p1 absorb_ratio_p2
}

foreach pct in 10 25 50 75 90{
	sum shipment_`pct'
	sum tot_ship_`pct'
	sum absorb_ratio_`pct'
}
**********************************************************
* Panels B-C

gen inv_num_est=1/(num_est+1)
sum inv_num_est if absorb_ratio>0 & absorb_ratio<.
sum inv_num_est if absorb_ratio==0

foreach var in firmtotest firmnumest num_est firmfrac{
	replace `var'=0 if `var'==.
}
sum firmtotest, de
sum firmnumest, de
sum num_est, de
sum firmfrac, de

count if firmnumest>1 & firmnumest!=.
count if firmnumest==1 & firmnumest!=.
count if firmnumest!=.
tab firmnumest

preserve
drop if shipment==0 | shipment==.
sum firmtotest, de
sum firmnumest, de
sum num_est, de
sum firmfrac, de

count if firmfrac==.
count if firmfrac~=. & firmfrac==1
local c1=r(N)
count if firmfrac~=. 
local t=r(N)
count if firmfrac~=. & firmfrac==0
local c0=r(N)
dis `c0'/`t'
dis `c1'/`t'

/* Census disclosure avoidance rules prohibit us from actually reporting the Xth percentile.
   Instead we report the average of the (X-1)th and (X+1)th percentile */

foreach pct in 10 25 50 75 90{
	foreach var in firmtotest firmnumest num_est firmfrac{
	replace `var'=0 if `var'==.
	local p1=`pct'+1
	local p2=`pct'-1
	egen `var'_p1=pctile(`var'), p(`p1')
	egen `var'_p2=pctile(`var'), p(`p2')
	gen `var'_`pct'=(`var'_p1+`var'_p2)/2
	gen `var'_`pct'samp=1 if `var'==`var'_p1 | `var'==`var'_p2
	drop `var'_p1 `var'_p2
	}
}
foreach pct in 10 25 50 75 90{
	foreach var in firmtotest firmnumest num_est firmfrac{
	sum `var'_`pct'
	}
}
restore

preserve
keep if shipment==0 | shipment==.

sum firmtotest, de
sum firmnumest, de
sum num_est, de
sum firmfrac, de

count if firmfrac==.
count if firmfrac~=. & firmfrac==1
local c1=r(N)
count if firmfrac~=. 
local t=r(N)
count if firmfrac~=. & firmfrac==0
local c0=r(N)
dis `c0'/`t'
dis `c1'/`t'

foreach pct in 10 25 50 75 90{
	foreach var in firmtotest firmnumest num_est firmfrac{
	replace `var'=0 if `var'==.
	local p1=`pct'+1
	local p2=`pct'-1
	egen `var'_p1=pctile(`var'), p(`p1')
	egen `var'_p2=pctile(`var'), p(`p2')
	gen `var'_`pct'=(`var'_p1+`var'_p2)/2
	gen `var'_`pct'samp=1 if `var'==`var'_p1 | `var'==`var'_p2
	drop `var'_p1 `var'_p2
	}
}
foreach pct in 10 25 50 75 90{
	foreach var in firmtotest firmnumest num_est firmfrac{
	sum `var'_`pct'
	}
}
restore

**********************************************************
* Panel D
foreach var in firmnumest{
	replace `var'=0 if `var'==.
}
count if firmnumest>1 & firmnumest!=.
count if firmnumest==1 & firmnumest!=.
count if firmnumest!=.
tab firmnumest

preserve
drop if firmnumest>=1 & firmnumest!=.
sum lmileage2, de

foreach pct in 10 25 50 75 90{
	foreach var in lmileage2{
	local p1=`pct'+1
	local p2=`pct'-1
	egen `var'_p1=pctile(`var'), p(`p1')
	egen `var'_p2=pctile(`var'), p(`p2')
	gen `var'_`pct'=(`var'_p1+`var'_p2)/2
	gen `var'_`pct'samp=1 if `var'==`var'_p1 | `var'==`var'_p2
	drop `var'_p1 `var'_p2
	}
}
foreach pct in 10 25 50 75 90{
	foreach var in lmileage2{
	sum `var'_`pct'
	}
}
restore

preserve
drop if firmnumest==0 & firmnumest!=.
sum lmileage2, de

foreach pct in 10 25 50 75 90{
	foreach var in lmileage2{
	local p1=`pct'+1
	local p2=`pct'-1
	egen `var'_p1=pctile(`var'), p(`p1')
	egen `var'_p2=pctile(`var'), p(`p2')
	gen `var'_`pct'=(`var'_p1+`var'_p2)/2
	gen `var'_`pct'samp=1 if `var'==`var'_p1 | `var'==`var'_p2
	drop `var'_p1 `var'_p2
	}
}
foreach pct in 10 25 50 75 90{
	foreach var in lmileage2 {
	sum `var'_`pct'
	}
}
restore

}

/******************************************************
	Table 10
******************************************************/

/* This is the table which examines whether our coefficient estimates are sensitive to the
  definition of vertical links (using more restrictive definition of two industries being
  linked to one another ) */
  
if `table10'==1 {
  
use firmid dest_zip lbdnum mu same_zip absorb_ratio lmileage2_mr firmfrac_mr num_est using  analysis_data2007_20 , clear
xtset lbdnum
drop if mu!=1
drop if same_zip==1
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe robust
gen esamp13_20=1 if e(sample)
gen inv_num_est=1/(1+num_est)
sum inv_num_est if esamp13_20==1
keep esamp* lbdnum dest_zip firmid 
save `esampfolder'/table13_20, replace
use firmid dest_zip lbdnum mu same_zip absorb_ratio lmileage2_mr firmfrac_mr num_est using  analysis_data2007_30 , clear
xtset lbdnum
drop if mu!=1
drop if same_zip==1
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe robust
gen esamp13_30=1 if e(sample)
gen inv_num_est=1/(1+num_est)
sum inv_num_est if esamp13_30==1
keep esamp* lbdnum dest_zip firmid 
save `esampfolder'/table13_30, replace

}

/******************************************************
	Table 9: Columns 2-4
******************************************************/

/* This is the table which examines whether our coefficient estimates are sensitive to the
  size of the firm of the sending establishment. Does the firm have >=5 establishments, 10
  establishments or 20 establishments? */
  
if `table9'==1 {

use lbd20062007naics, clear
drop if lbdnum==""
keep lbdnum firmid zip yr pay
gen firmsize=1
collapse (sum) firmsize , by(firmid yr)
drop if yr!=2007 | firmid==""
drop yr
destring firmid , force replace
save firmsize, replace

use firmid dest_zip lbdnum mu same_zip absorb_ratio lmileage2_mr firmfrac_mr using  analysis_data2007 , clear
xtset lbdnum
drop if mu!=1
drop if same_zip==1
merge n:1 firmid using firmsize
keep if _merge==3
drop _merge
keep if firmsize>=5
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe robust
gen esamp12_5=1 if e(sample)
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr if firmsize>=10, fe robust
gen esamp12_10=1 if e(sample)
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr if firmsize>=20, fe robust
gen esamp12_20=1 if e(sample)
keep esamp* lbdnum dest_zip firmid 
save `esampfolder'/table12, replace

}

if `table_panel'==1 {
use panel_both0207, clear
	replace vs2002=0 if vs2002==. /*replace 0 shipment with 1 and then take the log*/
	gen shipment2002=vs2002
	replace vs2002=1 if vs2002==0 
	bys orig_zip `indvar': egen tot_ship_o1=sum(shipment2002)
	bys dest_zip `indvar': egen tot_ship_d1=sum(shipment2002)
	bys `indvar': egen tot_ship1=sum(shipment2002)

	gen theta_i1 = tot_ship_o1/tot_ship1
	gen theta_d1 = tot_ship_d1/tot_ship1
	gen absorb_ratio_20021=shipment2002/tot_ship_d1
	
	bys orig_zip `indvar': egen tot_ship_o2=sum(shipment)
	bys dest_zip `indvar': egen tot_ship_d2=sum(shipment)
	bys `indvar': egen tot_ship2=sum(shipment)
	gen absorb_ratio_2007=shipment/tot_ship_d2
	
	replace absorb_ratio_2002=0 if absorb_ratio_2002==.
	replace absorb_ratio_20021=0 if absorb_ratio_20021==.
	replace absorb_ratio_2007=0 if absorb_ratio_2007==.
	
foreach v in  absorb_ratio_2002{
        di "`v'"
        egen MRDIS1`v' = sum(theta_i*`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = sum(theta_d*`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = sum(theta_i*theta_d*`v'), by(`indvar')
	gen `v'_mrW=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR* 
}
foreach v in  absorb_ratio_2002{
        di "`v'"
	egen mean1 = mean(`v'), by(dest_zip `indvar')
	egen mean2 = mean(`v'), by(orig_zip `indvar')
	egen mean3 = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-mean1-mean2+mean3
	drop mean1 mean2 mean3
}

xtset lbdnum
drop if mu!=1
drop if same_zip==1
preserve
foreach i in 2002{
dis "THIS IS `i'"
xtpoisson absorb_ratio absorb_ratio_`i'_mrW lmileageA_mrW lmileageB_mrW lmileageC_mrW lmileageD_mrW lmileageE_mrW firmfrac_mrW, fe robust
xtpoisson absorb_ratio absorb_ratio_`i'_mr lmileageA_mr lmileageB_mr lmileageC_mr lmileageD_mr lmileageE_mr firmfrac_mr, fe robust
xtpoisson absorb_ratio absorb_ratio_`i' lmileageA-lmileageE firmfrac, fe robust
xtpoisson absorb_ratio absorb_ratio_`i'_mrW lmileage2_mrW firmfrac_mrW, fe robust
xtpoisson absorb_ratio absorb_ratio_`i'_mr lmileage2_mr firmfrac_mr, fe robust
xtpoisson absorb_ratio absorb_ratio_`i' lmileage2 firmfrac, fe robust

foreach v in im1frac_mr im2frac_mr im3frac_mr {
	xtset lbdnum
	xtreg firmfrac_mr `v' lmileage2_mr absorb_ratio_`i'_mr, fe 
	test `v'
	predict errort`v'_`i', e
	dis "`v'"
	xtpoisson absorb_ratio absorb_ratio_`i'_mr lmileage2_mr firmfrac_mr errort`v'_`i', fe robust
}
}

}


if `ten_mile'==1 {
/* This portion of the code corresponds to columns 4-6 of Table 2 of the paper. */
use pay firmid emp  theta_i theta_d naicsup orig_zip lmileageA* lmileageB* lmileageC* lmileageD* lmileageE*  lbdnum dest_zip lmileage2 lmileage2_mrW lmileage2_mr firmfrac firmfrac_mr firmfrac_mrW same_zip absorb_ratio dest_zip mu  using analysis_data2007, replace
xtset lbdnum
gen lmileageZ=(exp(lmileage2)<=10 )
replace lmileageA=(exp(lmileage2)>10 & exp(lmileage2)<=50 )

drop lmileageA_mr lmileageA_mrW
foreach v of varlist lmileageZ lmileageA {
	egen mean1 = mean(`v'), by(dest_zip `indvar')
	egen mean2 = mean(`v'), by(orig_zip `indvar')
	egen mean3 = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-mean1-mean2+mean3
	drop mean1 mean2 mean3
        egen MRDIS1`v' = sum(theta_i*`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = sum(theta_d*`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = sum(theta_i*theta_d*`v'), by(`indvar')
	gen `v'_mrW=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
}
drop if mu==0
drop if same_zip==1

merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
keep if _merge==3

xtpoisson absorb_ratio lmileageZ_mrW lmileageA_mrW lmileageB_mrW lmileageC_mrW  lmileageD_mrW lmileageE_mrW firmfrac_mrW, fe robust
xtpoisson absorb_ratio lmileageZ_mr lmileageA_mr lmileageB_mr lmileageC_mr  lmileageD_mr lmileageE_mr firmfrac_mr, fe robust
xtpoisson absorb_ratio lmileageZ lmileageA lmileageB lmileageD lmileageC  lmileageE firmfrac, fe robust

keep lbdnum dest_zip firmid  emp pay  lmileageZ lmileageA lmileageB lmileageC lmileageD lmileageE
compress
save `esampfolder'ten_mile.dta, replace

}

if `figure_1'==1 {
/* This portion of the corresponds to Figure 1 of the paper */
use naicsup firmid pay emp mu same_zip absorb_ratio lbdnum dest_zip firmfrac lmileage2 if mu==1 & same_zip==0 using  analysis_data2007, replace
merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
keep if _merge==3
gen group=1
replace group=group+1 if exp(lmileage2)>10
replace group=group+1 if exp(lmileage2)>50
replace group=group+1 if exp(lmileage2)>100
replace group=group+1 if exp(lmileage2)>150
replace group=group+1 if exp(lmileage2)>200
replace group=group+1 if exp(lmileage2)>300
replace group=group+1 if exp(lmileage2)>500
replace group=group+1 if exp(lmileage2)>1000
replace group=group+1 if exp(lmileage2)>2000

bys group: egen lmileage2_group=mean(lmileage2)
gen pos_firm_frac=(firmfrac>0) if firmfrac<.
bys group pos_firm_frac: gen group_fi=1 if _n==1  
bys group pos_firm_frac: egen absorb_ratio_group=mean(absorb_ratio)
save `esampfolder'figure_1_analogue.dta, replace

}

if `alternate_cluster'==1 {
/* This portion of the code corresponds to Table 12 of the paper */
use firmid pay emp naicsup mu same_zip lbdnum dest_zip absorb_ratio lmileage2_mr firmfrac_mr using analysis_data2007, replace
merge n:1 dest_zip using _zip_to_state_county
drop if _merge==2
drop _merge
sort dest_zip
replace dest_county=dest_county[_n-1] if dest_county==.
replace dest_state=dest_state[_n-1] if dest_state==.
gen dest_state_county=dest_state*1000+dest_county
keep if mu==1 & same_zip==0
merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
keep if _merge==3
drop _merge
gen double naics_dest_state=dest_state*1e7+naicsup
gen double naics_dest_county=dest_state_county*1e7+naicsup
gen double naics_dest_zip=dest_zip*1e7+naicsup

xtset lbdnum
xtreg absorb_ratio lmileage2_mr firmfrac_mr, fe rob
gen esampA=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(dest_state)
gen esampB=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(naics_dest_state)
gen esampC=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(naics_dest_county)
gen esampD=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(naics_dest_zip)
gen esampE=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(dest_state_county)
gen esampF=e(sample)
areg absorb_ratio lmileage2_mr firmfrac_mr, a(lbdnum) cluster(dest_zip)
gen esampG=e(sample)
keep lbdnum dest_zip firmid emp esamp* pay
compress
save `esampfolder'alternate_cluster.dta, replace

}

if `shipments_per_est'==1 {
  /* This portion of the code corresponds to columns 4-6 of Table 10 of the paper */
  use  cfs2007cleaned.dta, replace
  bys lbdnum: gen shipments=_N
  keep if lbdnum~=""
  bys lbdnum : keep if _n==1
  keep lbdnum shipments
  destring lbdnum, replace force
  save shipments_by_establishment, replace
  
  use firmid pay emp  theta_i theta_d mu orig_zip naicsup shipment dest_zip lmileage2* firmfrac* same_zip   lbdnum absorb_ratio  using analysis_data2007, replace
  merge n:1 lbdnum using shipments_by_establishment
  keep if _merge==3
  drop _merge
  gen shipmentHigh=(shipments>=100)
  gen lmileage2S=lmileage2*shipmentHigh
  gen firmfracS=firmfrac*shipmentHigh
  foreach v of varlist lmileage2S firmfracS {
        egen MRDIS1`v' = sum(theta_i*`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = sum(theta_d*`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = sum(theta_i*theta_d*`v'), by(`indvar')
	gen `v'_mrW=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
        egen MRDIS1`v' = mean(`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = mean(`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
  }
  
  keep if mu==1 & same_zip==0
  merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
  keep if _merge==3
  drop _merge

  xtset lbdnum
  xtpoisson absorb_ratio lmileage2 firmfrac lmileage2S firmfracS , fe robust
  gen esampA=e(sample)
  xtpoisson absorb_ratio lmileage2_mr firmfrac_mr lmileage2S_mr firmfracS_mr , fe robust
  gen esampB=e(sample)
  xtpoisson absorb_ratio lmileage2_mrW firmfrac_mrW lmileage2S_mrW firmfracS_mrW , fe robust
  gen esampC=e(sample)
  keep lbdnum dest_zip firmid emp esamp* pay shipmentHigh
  compress
  save `esampfolder'shipments_per_est.dta, replace

}

/* This portion of the code corresponds to columns 2-3 of Table 14 of the paper */
if `noexports'==1 {
   use naics itm_fd_shipmt_val bwhiqj_cfs itm_fl_exp_yn using cfs2007, clear  
   gen vs=itm_fd_shipmt_val*bwhiqj_cfs /*$ as units*/
   bys naics: egen sum_vs=sum(vs)
   bys naics: egen sum_exports=sum(vs*(itm_fl_exp_yn=="Y"))
   gen frac_exports=sum_exports/sum_vs
   by naics: keep if _n==1
   destring naics, replace force
   ren naics naicsup
   keep naicsup frac_exports
   save exports_by_naics, replace
   use pay emp firmid mu orig_zip  naicsup shipment dest_zip lmileage2_mr firmfrac_mr same_zip  lmileage2  lbdnum absorb_ratio using analysis_data2007, replace
    merge n:1 naicsup using exports_by_naics
    drop if same_zip==1
    drop if mu~=1
    drop _merge
    merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
    keep if _merge==3
    drop _merge

    gen in_sample=(frac_exports<=.1)
    xtset lbdnum
    xtpoisson absorb_ratio lmileage2_mr firmfrac_mr if in_sample==1, fe  rob
    gen esampA=e(sample)
    xtpoisson absorb_ratio lmileage2_mr firmfrac_mr if in_sample==0, fe  rob
    gen esampB=e(sample)
    keep lbdnum dest_zip firmid emp esamp* pay 
    compress
    save `esampfolder'no_exports.dta, replace
}

/* This portion of the code corresponds to Table 13 of the paper */
if `different_weights'==1 {

   local yr=2007
   local preyr=2006

   use cfs`yr', replace
   drop if itm_fl_exp_yn=="Y"
   gen cfn=substr(id,1,10)
   collapse (mean) bwhiqj_cfs , by(cfn)
   ren bwhiqj_cfs sampl_weight
   save cfs`yr'_sampl_weight, replace

    use lbd`preyr'`yr'naics, clear
    gsort yr cfn -pay
    by yr cfn: keep if _n==1
    bys cfn: gen ni=_N
    drop if ni>=2 & yr!=`yr'
    drop ni
    merge 1:1 cfn using cfs`yr'_sampl_weight
    keep if _merge==3
    keep lbdnum cfn sampl_weight
    destring lbdnum , replace
    collapse (sum) sampl_weig, by(lbdnum )
    save cfs`yr'_sampl_weight, replace

    use pay emp firmid same_zip absorb_ratio lmileage2_mr firmfrac_mr firmfrac firmfrac_mrW lmileage2 lmileage2_mrW lbdnum dest_zip  mu if mu==1 & same_zip==0 using analysis_data2007, replace
    merge n:1 lbdnum using cfs`yr'_sampl_weight
    drop if _merge==2
    sum sampl_wei if _merge==3
    replace sampl_wei=r(mean) if _merge==1
    drop _merge mu same_zip
    merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
    keep if _merge==3
    xtset lbdnum
    xtpoisson absorb_ratio lmileage2_mr firmfrac_mr [w=sampl_wei], fe robust
      gen esampA=e(sample)
    xtpoisson absorb_ratio lmileage2_mrW firmfrac_mrW [w=sampl_wei], fe robust
   gen esampC=e(sample)
   xtpoisson absorb_ratio lmileage2 firmfrac [w=sampl_wei], fe robust
    gen esampB=e(sample)
    keep lbdnum dest_zip firmid emp esamp* pay
    compress
    save `esampfolder'different_weights_1.dta, replace

   
   use lbd`preyr'`yr'naics, clear
   drop if yr!=`yr' |  firmid==""
   keep firmid* lbdnum zip `indvar' pay
   gen dest_zip=substr(zip,1,5)
   destring firmid* lbdnum dest_zip, force replace
   rename `indvar' `indvardown'

   ren firmid2002 firmid2002t
   merge 1:1 lbdnum using im_`yr'plants_4
   drop firmid2002
   ren firmid2002t firmid2002
   gen merge_ind=1 if _merge==3
   gen double firm_at_ma=.
   forvalues x=2002/2006 {
     replace firm_at_ma=firmid`x' if mrgeyr==`x'
   }

   drop if dest_zip==. | `indvardown'==.
   sort dest_zip `indvardown' firmid firm_at_ma
   by dest_zip `indvardown' firmid firm_at_ma: gen firm_num_est=_N
   by dest_zip `indvardown' firmid firm_at_ma: egen firm_sum_pay=sum(pay)
   collapse (max) firm_num_est firm_sum_pay , by(dest_zip `indvardown' firmid  firm_at_ma)
   save firmnumest_`yr'_, replace

     insheet using naicsTable`pctlink'.csv, clear
     joinby `indvardown' using firmnumest_`yr'_
     collapse (sum) firm_sum_pay firm_num_est, by(dest_zip `indvar' firmid  firm_at_ma)
     save firm_num_est_`yr'_, replace 
  
  use cfs`yr'cleaned, clear
  drop if lbdnum==""
  drop firmid `indvar' bestsic bestnaics
  save temp, replace
  
  use lbd`preyr'`yr'naics, clear
  gsort yr lbdnum -pay
  by yr lbdnum: keep if _n==1
  bys lbdnum: gen ni=_N
  drop if ni>=2 & yr!=`yr'
  drop ni
  merge 1:m lbdnum using temp
  drop if _merge~=3
  drop _merge
  keep lbdnum firmid* county zip orig_zip mu emp pay naicsup 
  gsort lbdnum -pay
  by lbdnum: keep if _n==1
  destring lbdnum firmid, force replace
  merge 1:m lbdnum using zipbank_vs_`yr'
  drop if _merge~=3
  /*unmatched due to missing naicsup*/ 
  drop _merge
  merge m:1 `indvar' dest_zip using num_est_`yr'
  replace num_est=0 if _merge==1
  replace sum_pay=0 if _merge==1
  /*unmatched due to not in CFS*/
  drop if _merge==2
  drop _merge

  merge m:1 firmid dest_zip using firmtotest`yr'
  drop if _merge==2
  drop _merge
  save temp_out, replace

  use  temp_out, replace
  joinby `indvar' dest_zip firmid using firm_num_est_`yr'_, unmatched(master)
  drop _merge
  gsort lbdnum dest_zip -pay
  save gen_firmfrac_, replace

  use firm_sum_pay firm_num_est lbdnum dest_zip using gen_firmfrac_, replace
  sort lbdnum dest_zip
  by lbdnum dest_zip: egen firm_sum_pay_=sum(firm_sum_pay)
  by lbdnum dest_zip: egen firm_num_est_=sum(firm_num_est)
  by lbdnum dest_zip: keep if _n==1
  keep firm_sum_pay_ firm_num_est_ lbdnum dest_zip
  ren firm_sum_pay_ firm_sum_pay
  ren firm_num_est_ firm_num_est 
  save gen_firmfrac_1, replace

   use pay emp firmid firmfrac firmfrac_mr same_zip orig_zip naicsup theta_i theta_d absorb_ratio lmileage2*  lbdnum dest_zip  firmnumest sum_pay num_est firmsumpay mu  using analysis_data2007, replace
    merge 1:1 lbdnum dest_zip  using gen_firmfrac_1, keep(1 3)
   sum firmnumest firm_num_est
   cor firmnumest firm_num_est
   gen firmpayfrac=firm_sum_pay/sum_pay
   replace firmpayfrac=0 if sum_pay==0 | sum_pay==.
   foreach v of varlist firmpayfrac {
        egen MRDIS1`v' = sum(theta_i*`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = sum(theta_d*`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = sum(theta_i*theta_d*`v'), by(`indvar')
	gen `v'_mrW=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
        egen MRDIS1`v' = mean(`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = mean(`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
     }
   
   replace firmpayfrac_mr=0 if firmpayfrac_mr==.
   replace firmpayfrac_mrW=0 if firmpayfrac_mrW==.
   
   xtset lbdnum
   drop if mu!=1
   drop if same_zip==1
   drop _merge
   merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
   keep if _merge==3
   
   xtpoisson absorb_ratio lmileage2_mr firmpayfrac_mr, fe robust
   gen esampA=e(sample)
   xtpoisson absorb_ratio lmileage2_mrW firmpayfrac_mrW, fe robust
   gen esampB=e(sample)
   xtpoisson absorb_ratio lmileage2 firmpayfrac, fe robust
   gen esampC=e(sample)

   keep lbdnum dest_zip firmid emp esamp* pay
   compress
   save `esampfolder'different_weights_2.dta, replace
   
}


/* This portion of the code corresponds to the final column of Table 14 of the paper */
if `countyregs'==1 {

use lbdnum county* state* zip* using lbd2007, replace
ren state dest_state
ren county dest_county
gen dest_zip=substr(zip,1,5)
destring dest_* , replace
bys dest_zip dest_state dest_county: gen ct=_N
bys dest_zip : gen ct_=_N
gen sh = ct/ct_
gsort dest_zip -sh
by dest_zip : keep if _n==1
keep dest_zip dest_county dest_state
save _zip_to_state_county, replace

use pay firmid emp  mu same_zip naicsup lmileage2 lbdnum orig_zip dest_zip shipment tot_ship_d firmnumest num_est using analysis_data2007, replace
merge n:1 dest_zip using _zip_to_state_county
drop if _merge==2
sort dest_zip
replace dest_county=dest_county[_n-1] if dest_county==.
replace dest_state=dest_state[_n-1] if dest_state==.
gen dest_state_county=dest_state*1000+dest_county

bys lbdnum dest_state_county: egen s1=sum(firmnumest)
bys lbdnum dest_state_county: egen s2=sum(num_est)
bys lbdnum dest_state_county: egen s3=sum(shipment)
bys lbdnum dest_state_county: egen s4=sum(tot_ship_d)

drop if same_zip==1

collapse (mean) s1-s4 lmileage2 mu  , by(lbdnum dest_state_county orig_zip naicsup firmid emp pay)
gen firmfrac=s1/s2
gen absorb_ratio=s3/s4

replace firmfrac=0 if firmfrac==.
xtset lbdnum
foreach v of varlist lmileage2 firmfrac {
        egen MRDIS1`v' = mean(`v'), by(dest_state_county naicsup)
	egen MRDIS2`v' = mean(`v'), by(orig_zip naicsup)
	egen MRDIS3`v' = mean(`v'), by(naicsup)
	gen `v'_mr=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR* 
}
keep if mu==1

count
count if firmfrac==0
count if firmfrac==1
count if absorb_ratio==0
count if absorb_ratio==1

gen firmfrac0=(firmfrac==0)
gen firmfrac1=(firmfrac==1)
gen absorb_ratio0=(absorb_ratio==0)
gen absorb_ratio1=(absorb_ratio==1)

xtpoisson absorb_ratio lmileage2 firmfrac, fe  rob
gen esampA=e(sample)
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe  rob
gen esampB=e(sample)
keep lbdnum  dest_state_county firmid emp esamp* pay firmfrac0-absorb_ratio1
compress
save `esampfolder'county_regs.dta, replace

}

/* End of county robustness check */

if `get_firm_frac_2002'==1 { 

local yr=2002
local preyr=`yr'-1

use lbd`preyr'`yr'naics, clear
drop if yr!=`yr' |  firmid==""
keep firmid* lbdnum zip `indvar' pay
gen dest_zip=substr(zip,1,5)
destring firmid* lbdnum dest_zip, force replace
rename `indvar' `indvardown'
drop if dest_zip==. | `indvardown'==.
sort dest_zip `indvardown' firmid
by dest_zip `indvardown' firmid: gen firmnumest=_N
by dest_zip `indvardown' firmid: egen firmsumpay=sum(pay)
collapse (max) firmnumest firmsumpay , by(dest_zip `indvardown' firmid )
save firmnumest_`yr', replace
 
use lbd`preyr'`yr'naics, clear
drop if lbdnum==""
keep lbdnum firmid zip yr pay
gen dest_zip=substr(zip,1,5)
destring dest_zip firmid, force replace
drop zip
gen firmtotest=1
ren pay firmsumpay
collapse (sum) firmtotest firmsumpay, by(firmid dest_zip yr)
drop if yr!=`yr' | firmid==.
save firmtotest`yr', replace

/* Get number of ds plants and total number of plants at dest_zip*/
use lbd`preyr'`yr'naics, clear
drop if yr!=`yr'  | firmid==""
keep zip `indvar' lbdnum pay
rename `indvar' `indvardown'
drop if lbdnum==""
gen dest_zip=substr(zip,1,5)
destring dest_zip, force replace
sort dest_zip `indvardown'
by dest_zip `indvardown': gen num_est=_n
by dest_zip `indvardown': egen sum_pay=sum(pay) 
by dest_zip: gen tot_num_est=_N
collapse (max) num_est sum_pay (mean) tot_num_est, by(dest_zip `indvardown')
save num_est_by_`indvardown'_`yr', replace

insheet using naicsTable`pctlink'.csv, clear
joinby `indvardown' using num_est_by_`indvardown'_`yr'
collapse (sum) sum_pay  num_est (mean) tot_num_est, by(dest_zip `indvar')
save num_est_`yr', replace

insheet using naicsTable`pctlink'.csv, clear
joinby `indvardown' using firmnumest_`yr'
 if `yr'==2002 {
collapse (sum) firmsumpay firmnumest , by(dest_zip `indvar' firmid)
}
 if `yr'==2007 {
 collapse (sum) firmsumpay firmnumest im1-m_numpay (max) mrgeyr, by(dest_zip `indvar' firmid firm_at_ma)
}
 save firm_num_est_`yr', replace
 
use cfs`yr'cleaned, clear
drop if lbdnum==""
destring firmid lbdnum dest_zip, force replace
collapse (sum) vs, by(lbdnum `indvar' dest_zip)
merge 1:1 lbdnum `indvar' dest_zip using zipbank_2007
/*unmatched due to missing naicsup*/
drop if _merge==1
drop _merge
save zipbank_vs_`yr', replace

use cfs`yr'cleaned, clear
drop if lbdnum==""
drop firmid `indvar' bestsic bestnaics
save temp, replace
use lbd`preyr'`yr'naics, clear
gsort yr lbdnum -pay
by yr lbdnum: keep if _n==1
bys lbdnum: gen ni=_N
drop if ni>=2 & yr!=`yr'
drop ni
merge 1:m lbdnum using temp
drop if _merge~=3
drop _merge
keep lbdnum firmid* county zip orig_zip mu emp pay naicsup 
gsort lbdnum -pay
by lbdnum: keep if _n==1
destring lbdnum firmid, force replace
merge 1:m lbdnum using zipbank_vs_`yr'
drop if _merge~=3
/*unmatched due to missing naicsup*/
drop _merge
merge m:1 `indvar' dest_zip using num_est_`yr'
replace num_est=0 if _merge==1
replace sum_pay=0 if _merge==1
/*unmatched due to not in CFS*/
drop if _merge==2
drop _merge

merge m:1 firmid dest_zip using firmtotest`yr'
drop if _merge==2
drop _merge
save temp_out, replace

use  temp_out, replace
joinby `indvar' dest_zip firmid using firm_num_est_`yr', unmatched(master)
save gen_firmfrac_2002, replace

use gen_firmfrac_2002, clear
sort lbdnum dest_zip vs

foreach var of varlist firmtotest firmsumpay firmnumest {
   by lbdnum dest_zip: egen t=max(`var')
   replace `var'=t
   drop t
}

drop zip
compress

gen firmnumest2=firmnumest
replace firmnumest2=0 if dest_zip==orig_zip  
gen firmfrac = firmnumest/num_est if num_est~=0
gen firmfrac2 = firmnumest2/num_est if num_est~=0
gen firmfrac3 = firmsumpay/sum_pay if sum_pay~=0
replace firmfrac=0 if num_est==0
replace firmfrac=0 if _merge==1
replace firmfrac2=0 if num_est==0
replace firmfrac2=0 if _merge==1
replace firmfrac3=0 if sum_pay==0
replace firmfrac3=0 if _merge==1
foreach var of varlist firmfrac* num_est sum_pay firmnumest* {
  ren `var' `var'_2002
}
keep lbdnum dest_zip firmfrac*  num_est sum_pay firmnumest*
save firmfrac_2002, replace
 
}

if `additional_panel_regs'==1 {

use pay emp firmid num_est naicsup lbdnum theta_i theta_d dest_zip orig_zip same_zip mu absorb_ratio* lmileage2 lmileage2_mr lmileage2_mrW firmfrac_mr firmfrac firmfrac_mrW   im2frac_mr  im1frac_mr  im3frac_mr   using  panel_both0207.dta, replace
ren num_est num_est_2007
merge 1:1 lbdnum dest_zip using firmfrac_2002
drop if _merge==2
drop _merge
replace firmfrac_2002=0 if firmfrac_2002==.
replace absorb_ratio_2002=0 if absorb_ratio_2002==.    

foreach v of varlist  firmfrac_2002  {
        egen MRDIS1`v' = sum(theta_i*`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = sum(theta_d*`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = sum(theta_i*theta_d*`v'), by(`indvar')
	gen `v'_mrW=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
        egen MRDIS1`v' = mean(`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = mean(`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
}

ren firmfrac_2002_mr  firmfrac_mr_2002
ren firmfrac_2002_mrW  firmfrac_mrW_2002
ren firmfrac firmfrac_2007
replace num_est_2002=0 if num_est_2002==. & num_est_2007~=.
gen inv_num_est_2002=1/(1+num_est_2002)
gen inv_num_est_2007=1/(1+num_est_2007)

sum inv_num_est_2002
sum inv_num_est_2007

drop if mu==0 | same_zip==1
ren firmfrac_mr firmfrac_mr_2007
ren absorb_ratio absorb_ratio_2007

xtset lbdnum

gen firmfrac_mr_diff=firmfrac_mr_2007-firmfrac_mr_2002
gen absorb_ratio_diff=absorb_ratio_2007-absorb_ratio_2002

/* These regressions appear in columns 1-3 of Table 4 */ 

xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 , fe rob
gen esamp_07=e(sample)

xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 absorb_ratio_2002 , fe rob
gen esamp_07A=e(sample)
xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 firmfrac_mr_2002 absorb_ratio_2002, fe rob
gen esamp_07B=e(sample)

/* Produce the first-stage regressions for Table 8 and Table 15 of the paper */

forvalues x=1/2 {

xtreg firmfrac_mr_2007 lmileage2_mr im`x'frac_mr if  esamp_07A==1, fe 
predict errort2007`x', e
xtreg firmfrac_mr_2007 lmileage2_mr im`x'frac_mr absorb_ratio_2002 if  esamp_07A==1, fe 
predict errortA2007`x', e
xtreg firmfrac_mr_2007 lmileage2_mr im`x'frac_mr firmfrac_mr_2002 absorb_ratio_2002 if  esamp_07A==1, fe 
predict errortB2007`x', e
xtreg firmfrac_mr_diff im`x'frac_mr if  esamp_07A==1 , fe 
predict errortC2007`x', e

}

xtreg absorb_ratio_diff firmfrac_mr_diff if esamp_07A==1, fe rob

gen extensive=(absorb_ratio_2007>0) if absorb_ratio_2007~=.

/* These regressions appear in Table 8 of the paper */

forvalues x=1/2 {
  di "x= " `x'
  xtreg absorb_ratio_diff firmfrac_mr_diff errortC2007`x' if esamp_07A==1, fe rob
  gen esamp_07C`x'=e(sample)  
  xtpoisson absorb_ratio_2002 lmileage2_mr firmfrac_mr_2002 errort2002`x', fe rob
  gen esamp_07D`x'=e(sample)  
  xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 errort2007`x', fe rob
  gen esamp_07E`x'=e(sample)
  xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 errortA2007`x' absorb_ratio_2002 , fe rob
  gen esamp_07F`x'=e(sample)
  xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 errortB2007`x' firmfrac_mr_2002 absorb_ratio_2002, fe rob
  gen esamp_07G`x'=e(sample)
  xtpoisson absorb_ratio_2007 lmileage2_mr firmfrac_mr_2007 errortB2007`x' firmfrac_mr_2002 absorb_ratio_2002 if extensive==1, fe rob
  gen esamp_07H`x'=e(sample)  
  xtpoisson extensive lmileage2_mr firmfrac_mr_2007 errortB2007`x' firmfrac_mr_2002 absorb_ratio_2002 , fe rob
  gen esamp_07I`x'=e(sample)  

}

preserve

keep lbdnum dest_zip firmid emp esamp* pay extensive
compress
save `esampfolder'panel_esampA.dta, replace

restore

keep extensive absorb_ratio_2007 absorb_ratio_2002 firmfrac_mr_2002 firmfrac_mr_2007 lbdnum dest_zip im2frac_mr  lmileage2_mr pay emp firmid 
reshape long absorb_ratio_ firmfrac_mr_ , i(lbdnum dest_zip) j(year)
gen double id=lbdnum*1e6+dest_zip

xtset id
/* This is the regression of column 4 of Table 4 */
xtpoisson absorb_ratio_ firmfrac_mr_, fe rob
gen esamp_Panel_A=e(sample)

xtset lbdnum 
xtpoisson absorb_ratio_ firmfrac_mr_, fe rob
xtpoisson absorb_ratio_ firmfrac_mr_ lmileage2_mr, fe rob
gen esamp_Panel_B=e(sample)

keep lbdnum dest_zip firmid emp esamp* pay
compress
save `esampfolder'panel_esampB.dta, replace

}

/* } */ 

/* End of panel compile */

if `subsample_big_zips'==1 {
/* This portion of the code corresponds to Table 11 of the paper */
use firmid pay emp num_est mu same_zip absorb_ratio lbdnum dest_zip firmfrac_mr lmileage2_mr if mu==1 & same_zip==0 using analysis_data2007, replace
gen inv_num_est=1/(1+num_est)
merge n:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
keep if _merge==3
cap drop _merge
xtset lbdnum
xtpoisson absorb_ratio firmfrac_mr lmileage2_mr if num_est>=5 , fe rob
gen esamp5=e(sample)
xtpoisson absorb_ratio firmfrac_mr lmileage2_mr if num_est>=10 , fe rob
gen esamp10=e(sample)
sum inv_num_est if esamp5==1
sum inv_num_est if esamp10==1

keep lbdnum dest_zip firmid emp esamp* pay
compress
save `esampfolder'subsample_big_zips.dta, replace

}


if `extensive'==1 {

/* This portion of the code corresponds to column 4 of Table 14 and then columns 4-5 of Table 3 

*/
use naicsup same_zip emp pay num_est firmfrac firmid  orig_zip   lbdnum dest_zip  lmileage2_mr firmfrac_mr firmfrac_mrW same_zip absorb_ratio dest_zip mu   using analysis_data2007, replace

gen firmfracS=firmfrac
replace firmfracS=firmfrac-1/num_est if orig_zip==dest_zip & (firmfrac>0 & firmfrac<.)
replace firmfracS=0 if firmfracS==. & firmfrac~=.

foreach v of varlist same_zip firmfracS  {
        egen MRDIS1`v' = mean(`v'), by(dest_zip `indvar')
	egen MRDIS2`v' = mean(`v'), by(orig_zip `indvar')
	egen MRDIS3`v' = mean(`v'), by(`indvar')
	gen `v'_mr=`v'-(MRDIS1`v'+MRDIS2`v'-MRDIS3`v')
	drop MR*
}

drop if mu==0

xtset lbdnum
xtpoisson absorb_ratio lmileage2_mr firmfracS_mr same_zip, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfracS_mr same_zip_mr, fe robust
gen esampA1=e(sample)
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr same_zip, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr same_zip_mr, fe robust
gen esampA2=e(sample)

merge 1:1 lbdnum dest_zip using  `esampfolder'table3, keepusing() keep(1 3)

gen extensive=(absorb_ratio>0) if absorb_ratio~=.
xtpoisson extensive lmileage2_mr firmfrac_mr if same_zip==0 & _merge==3, fe robust
gen esampB=e(sample)
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr if extensive==1 & same_zip==0 & _merge==3, fe robust
gen esampC=e(sample)
keep lbdnum dest_zip firmid emp esamp* pay extensive same_zip
compress
save `esampfolder'extensive.dta, replace
}


if `inter_cf'==1 {
    /* This portion of the code corresponds to Table 15 of the paper */
use  firmid pay emp mu orig_zip  naicsup shipment dest_zip lmileage2* firmfrac* same_zip im1frac* im2frac* im3frac*  lbdnum absorb_ratio  using analysis_data2007, replace

sum lmileage2_mr, meanonly
gen lmileage2_mr_c = lmileage2_mr - r(mean)
gen inter_mr=lmileage2_mr_c*firmfrac_mr
gen imfrac_inter_1_mr=lmileage2_mr_c*im1frac_mr
gen imfrac_inter_2_mr=lmileage2_mr_c*im2frac_mr

gen inter=lmileage2*firmfrac
egen mean1 = mean(inter) , by(dest_zip naicsup)
egen mean2 = mean(inter) , by(orig_zip naicsup)
egen mean3 = mean(inter) , by(naicsup)
gen inter_mr_2=inter-mean1-mean2+mean3
drop mean1 mean2 mean3

forvalues x=1/3 {
gen im`x'frac_inter=lmileage2*im`x'frac
egen mean1 = mean(im`x'frac_inter) , by(dest_zip naicsup)
egen mean2 = mean(im`x'frac_inter) , by(orig_zip naicsup)
egen mean3 = mean(im`x'frac_inter) , by(naicsup)
gen im`x'frac_inter_mr_2=im`x'frac_inter-mean1-mean2+mean3
drop mean1 mean2 mean3
}


drop if mu!=1
drop if same_zip==1

merge n:1 lbdnum dest_zip using  `esampfolder'table3, keepusing()
keep if _merge==3

forvalues x = 1/3 {
	xtset lbdnum
	xtreg firmfrac_mr im`x'frac_mr lmileage2_mr im`x'frac_inter_mr_2, fe 
	predict errort`x', e
        
	xtreg inter_mr_2  im`x'frac_mr  lmileage2_mr im`x'frac_inter_mr_2, fe 
        predict errort`x'2, e

    	xtreg firmfrac_mr im`x'frac_mr lmileage2_mr , fe 
	predict errort`x'1, e
        
	xtpoisson absorb_ratio lmileage2_mr firmfrac_mr inter_mr_2 errort`x' errort`x'2, fe robust
        gen esampA`x'=e(sample)
	xtpoisson absorb_ratio lmileage2_mr firmfrac_mr inter_mr_2 errort`x'1 , fe robust
        gen esampB`x'=e(sample)
}

drop error*

forvalues x = 1/3 {
	xtset lbdnum
	xtreg firmfrac_mr im`x'frac_mr lmileage2_mr im`x'frac_inter_mr, fe 
	predict errort`x', e
        
	xtreg inter_mr  im`x'frac_mr  lmileage2_mr im`x'frac_inter_mr, fe 
        predict errort`x'2, e

    	xtreg firmfrac_mr im`x'frac_mr lmileage2_mr , fe 
	predict errort`x'1, e
        
	xtpoisson absorb_ratio lmileage2_mr firmfrac_mr inter_mr errort`x' errort`x'2, fe robust
        gen esampC`x'=e(sample)
	xtpoisson absorb_ratio lmileage2_mr firmfrac_mr inter_mr errort`x'1 , fe robust
        gen esampD`x'=e(sample)
 }



keep lbdnum dest_zip firmid emp esamp* pay
compress
save `esampfolder'inter_cf.dta, replace

}

if `compute_specialization'==1 {
	/* This portion of the code computes the number that appears in footnote 8 of 
	  the paper */
   use cfs2007, replace
   gen vs=itm_fd_shipmt_val*bwhiqj_cfs /*$ as units*/
   drop if itm_fl_exp_yn=="Y"
   gen cfn=substr(id,1,10)
   sort  cfn itm_ff_sctg_cde
   by cfn itm_ff_sctg_cde: egen sum_val_sctg=sum(vs)
   by cfn : egen sum_val=sum(vs)
   gen share=sum_val_sctg/sum_val
   gsort cfn -share
   by cfn: keep if _n==1
   keep share cfn 
   merge 1:n cfn using cfs2007cleaned
   keep lbdnum share
   bys lbdnum: keep if _n==1
   destring lbdnum , replace force
   drop if lbdnum==.
   merge 1:n lbdnum using  `esampfolder'table3
   drop if _merge==1
   bys lbdnum : gen fi=1 if _n==1
   sum share if fi==1
}

if `include_mu_0'==1 {

/* This portion of the do file computes the regression that appears in the final column of Table 9 */
use pay firmid emp  lbdnum dest_zip lmileage2 lmileage2_mrW lmileage2_mr firmfrac firmfrac_mr firmfrac_mrW same_zip absorb_ratio  mu  using analysis_data2007, replace
xtset lbdnum
drop if same_zip==1
xtpoisson absorb_ratio lmileage2 firmfrac, fe robust
xtpoisson absorb_ratio lmileage2_mr firmfrac_mr, fe robust
xtpoisson absorb_ratio lmileage2_mrW firmfrac_mrW, fe robust
gen esamp=e(sample)
keep lbdnum dest_zip firmid  emp pay  esamp mu
compress
save `esampfolder'include_mu_0.dta, replace

}
